# Final Exhibit - function fo tables 2-3 ---------------------------------------
get_weights_by_phat_byN <- function(dt,
                                    agg_level) {
  # check input parameters
  stopifnot(c("DMG_died_within_365d", "prob_for_report") %in% names(dt))
  stopifnot(nrow(dt[DMG_died_within_365d == "1"])==nrow(dt))
  stopifnot(is.numeric(agg_level) & agg_level > 0 & agg_level < 1)
  # get number of patient at each level of dt
  weights <- 
    dt[, .(w = .N/nrow(dt)), 
       by=.(round_prob=plyr::round_any(prob_for_report, agg_level, ceiling))]
  return(weights)
}

get_weights_by_phat_adj <- function(dt,
                                    agg_level,
                                    w_var = "num_days_lived",
                                    col_name = "w") {
  # check input parameters
  stopifnot(c("DMG_died_within_365d", "prob_for_report", w_var) %in% names(dt))
  stopifnot(nrow(dt[DMG_died_within_365d == "1"])==nrow(dt))
  stopifnot(is.numeric(agg_level) & agg_level > 0 & agg_level < 1)
  # get number of days survived at each level of dt
  tot<-sum(eval(parse(text=paste0("dt$", w_var))))
  weights <- 
    dt[, .(w = sum(get(w_var))/tot), 
       by=.(round_prob=plyr::round_any(prob_for_report, agg_level, ceiling))]
  data.table::setnames(weights, "w", col_name)
  return(weights)
}

do_table_3 <- function(dt_exh = dt_for_exhibits_cancer,
                       dt_cost = dt_cost_for_exhibits_cancer,
                       sample_n = "cancer",
                       low_intensity = c("oncology","internal_medicine",
                                     "geriatry","rehabilitation")) {
# V  Final Exhibit - Table 3 table A10 : Admissions statistics -------------------------------
tab_adm_rate_agg <- NULL
tab_adm_num_agg  <- NULL
tab_adm_LOS_agg <- NULL

for (drop_last_adm in c(TRUE, FALSE)) {
  
  low_inten <- low_intensity
  
  # Additional anlyses - Admission rates - agg --------------------------------a
  tab_adm_rate_agg_temp <- NULL
    
    dt_temp_adm_any <- merge(
      x = dt_exh[,.(id_var, S_index_date_XX, S_sample_source_XX, 
                    DMG_died_within_365d, prob_for_report)],
      y = data.table::dcast.data.table(
        data = dt_cost[main_cat %in% c("Inpatient_Planned", "Inpatient_Unplanned","Low", "High")  & 
                         cost_date >= S_index_date_XX &
                         (drop_last_adm == F | DMG_died_within_365d=="0" | 
                            difftime(DMG_date_of_death_XX, event_date_end, units = "days")>days_int), 
                       .(exist=(.N>0)), 
                       by=.(id_var, 
                            S_index_date_XX, 
                            S_sample_source_XX,
                            intensity = factor(ifelse(profession%in%low_inten,
                                                                          "Low","High")))],
        formula = id_var + S_index_date_XX + S_sample_source_XX ~ intensity,
        value.var = "exist",
        fill = FALSE
      ),
      by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
      all.x = T,
      all.y = F
    )
    cols_NA2F_agg <-  grep(("Low|High"), 
                           names(dt_temp_adm_any), 
                           value = T)
    dt_temp_adm_any[, (cols_NA2F_agg):=lapply(.SD, function(x){ifelse(is.na(x), FALSE, x)}), 
                    .SDcols=cols_NA2F_agg]
    dt_temp_adm_any[, any_adm := (Low == T | High == T)]
    
    dt_temp_adm_any_w <- merge(
      x = dt_temp_adm_any[, round_prob:=plyr::round_any(prob_for_report, round_par, ceiling)],
      y = get_weights_by_phat_byN(dt_exh[DMG_died_within_365d=="1"], round_par),
      by = "round_prob",
      all.x = T,
      all.y = F
    )
    
    cols_agg <- c("Low", "High", "any_adm")
    tab_temp <- merge(
      # unweighted data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, mean), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="0", 
                          lapply(.SD, mean), 
                          by=.(round_prob, w), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )
    
    # QA -----------------d
    # no NA
    stopifnot(sum(is.na(tab_temp)) == 0)
    # weigthing works
    qa_tab <- merge(
      # unweighted data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, mean), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="1", 
                          lapply(.SD, mean), 
                          by=.(round_prob, w), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )[, eq := (round(V1, 5)==round(`1`, 5))]
    stopifnot(nrow(qa_tab[eq==T]) == nrow(qa_tab))
    rm(qa_tab)
    # QA ends ------------d
    
    tab_adm_rate_agg_temp <- rbind(
      tab_adm_rate_agg_temp,
      tab_temp[, sample:=sample_n]
    )
    rm(#dt_exh, dt_cost,
      dt_temp_adm_any, dt_temp_adm_any_w, cols_agg, tab_temp)
 # }
  
  tab_adm_rate_agg_temp[,variable:=factor(variable, 
                                          levels=c("any_adm", "Low", "High"))]
  data.table::setcolorder(tab_adm_rate_agg_temp, c("sample", "variable", "0", "V1", "1"))
  data.table::setorder(tab_adm_rate_agg_temp, sample, variable)
  tab_adm_rate_agg_temp[, c("0", "V1", "1"):=lapply(.SD, round, 3), .SDcols=c("0", "V1", "1")]
  tab_adm_rate_agg_temp[, last_adm_dropped:=drop_last_adm]
  
  tab_adm_rate_agg <- rbind(
    tab_adm_rate_agg,
    tab_adm_rate_agg_temp
  )
  rm(tab_adm_rate_agg_temp)
  
  # Admission numbers - agg ---------------------------------------------------a
  tab_adm_num_agg_temp <- NULL
    
    dt_temp_adm_any <- merge(
      x = dt_exh[,.(id_var, S_index_date_XX, S_sample_source_XX, 
                    DMG_died_within_365d, prob_for_report, num_days_lived)],
      y = data.table::dcast.data.table(
        data = dt_cost[main_cat %in% c("Inpatient_Planned", "Inpatient_Unplanned","Low", "High")& 
                         cost_date >= S_index_date_XX &
                         (drop_last_adm == F | DMG_died_within_365d=="0" | 
                            difftime(DMG_date_of_death_XX, event_date_end, units = "days")>days_int),
                       .N, 
                       by=.(id_var, 
                            S_index_date_XX, 
                            S_sample_source_XX, 
                            intensity = factor(ifelse(profession%in%low_inten,
                                                      "Low","High")))],
        formula = id_var + S_index_date_XX + S_sample_source_XX ~ intensity,
        value.var = "N",
        fill = 0
      ),
      by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
      all.x = F, # keep only those with admissions
      all.y = T  # keep only those with admissions
    )[, any_adm := Low + High]
    
    # QA ----------------d
    stopifnot(nrow(dt_temp_adm_any[any_adm==0 | is.na(any_adm)])==0)
    # QA ends -----------d
    
    # add weights
    dt_temp_adm_any_w <- merge(
      x = dt_temp_adm_any[, round_prob:=plyr::round_any(prob_for_report, round_par, ceiling)],
      y = get_weights_by_phat_adj(dt_temp_adm_any[DMG_died_within_365d=="1"], round_par, "num_days_lived"),
      by = "round_prob",
      all.x = T,
      all.y = F
    ) 
    
    cols_agg <- c("Low", "High", "any_adm")
    tab_temp <- merge(
      # unweighted, yet adjusted, data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="0", 
                          lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                          by=.(round_prob, w), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )
    # QA -----------------d
    # no NA
    stopifnot(sum(is.na(tab_temp)) == 0)
    # weigthing works
    qa_tab <- merge(
      # unweighted, yet adjusted, data
      x = data.table::dcast.data.table(data.table::melt.data.table(
        data = dt_temp_adm_any_w[, lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                                 by=.(DMG_died_within_365d), 
                                 .SDcols=cols_agg],
        id.vars = "DMG_died_within_365d"
      ), formula = variable ~ DMG_died_within_365d),
      # weighted data
      y = data.table::as.data.table(t(
        dt_temp_adm_any_w[DMG_died_within_365d=="1", 
                          lapply(.SD, function(x){(sum(x)/sum(num_days_lived))*31}), 
                          by=.(round_prob, w), 
                          .SDcols=cols_agg][
                            , lapply(.SD, function(x){sum(x*w)/sum(w)}), 
                            .SDcols=cols_agg
                            ]
      ),keep.rownames="variable" ),
      by = "variable",
      all = T
    )[, eq := (round(V1, 5)==round(`1`, 5))]
    stopifnot(nrow(qa_tab[eq==T]) == nrow(qa_tab))
    rm(qa_tab)
    # QA ends ------------d
    
    tab_adm_num_agg_temp <- rbind(
      tab_adm_num_agg_temp,
      tab_temp[, sample := sample_n]
    )
    rm(#dt_exh, dt_cost,
      dt_temp_adm_any, dt_temp_adm_any_w, cols_agg, tab_temp)
  #}
  
  tab_adm_num_agg_temp[,variable:=factor(variable, 
                                         levels=c("any_adm", "Low", "High"))]
  data.table::setcolorder(tab_adm_num_agg_temp, c("sample", "variable", "0", "V1", "1"))
  data.table::setorder(tab_adm_num_agg_temp, sample, variable)
  tab_adm_num_agg_temp[, c("0", "V1", "1"):=lapply(.SD, round, 3), .SDcols=c("0", "V1", "1")]
  tab_adm_num_agg_temp[, last_adm_dropped:=drop_last_adm]
  
  tab_adm_num_agg <- rbind(
    tab_adm_num_agg,
    tab_adm_num_agg_temp
  )
  rm(tab_adm_num_agg_temp)
  
  # Additional anlyses - Admission LOS - agg ----------------------------------a
  tab_adm_LOS_agg_temp <- NULL

    dt_temp_adm_any <- merge(
      x = dt_exh[,.(id_var, S_index_date_XX, S_sample_source_XX, 
                    DMG_died_within_365d, prob_for_report, num_days_lived)],
      y = data.table::dcast.data.table(
        data = dt_cost[main_cat %in% c("Inpatient_Planned", "Inpatient_Unplanned","Low", "High") & 
                         cost_date >= S_index_date_XX &    
                         (drop_last_adm == F | DMG_died_within_365d=="0" | 
                            difftime(DMG_date_of_death_XX, event_date_end, units = "days")>days_int), 
                       .(amount = sum(ifelse(as.integer(difftime( event_date_end, cost_date, units = "days"))>365,
                                             as.integer(1),
                                             as.integer(difftime( event_date_end, cost_date, units = "days"))
                       ) 
                       ),
                       .N), 
                       by=.(id_var, 
                            S_index_date_XX, 
                            S_sample_source_XX, 
                            intensity = factor(ifelse(profession%in%low_inten,
                                                      "Low","High")))],
        formula = id_var + S_index_date_XX + S_sample_source_XX ~ intensity,
        value.var = c("N", "amount"),
        fill = 0
      ),
      by = c("id_var", "S_index_date_XX", "S_sample_source_XX"),
      all.x = F, # keep only those with admissions
      all.y = T  # keep only those with admissions
    )[, `:=` (
      N_any_adm = N_High + N_Low,
      amount_any_adm = amount_High + amount_Low
    )]
    
    # QA ----------------d
    stopifnot(nrow(dt_temp_adm_any[N_any_adm==0 | is.na(N_any_adm)])==0)
    stopifnot(nrow(dt_temp_adm_any[ is.na(amount_High)])==0)
    
    # QA ends -----------d
    
    tab <- NULL
    tab_qa <- NULL
    for (varn in c("any_adm", "Low", "High")) {
      amnt <- paste0("amount_", varn)
      nAdm <- paste0("N_", varn)
      dt_temp <- dt_temp_adm_any[nAdm>0]
      unweighted <- data.table::dcast.data.table(
        data = dt_temp[, .(variable=varn, V1 = sum(get(amnt),na.rm =T)/
                             sum(get(nAdm),na.rm =T)),
                       by=.(DMG_died_within_365d)],
        formula = variable~DMG_died_within_365d,
        value.var = "V1"
      )
      dt_temp_w <- merge(
        x = dt_temp[, round_prob:=plyr::round_any(prob_for_report, round_par, ceiling)],
        y = get_weights_by_phat_adj(dt_temp[DMG_died_within_365d=="1"], round_par, nAdm),
        by = "round_prob",
        all.x = T,
        all.y = F
      )
      weighted <- dt_temp_w[DMG_died_within_365d=="0", 
                            .(V1 = sum(get(amnt))/sum(get(nAdm))), 
                            by=.(round_prob, w)][, .(V1=sum(V1*w, na.rm = TRUE)/sum(w))] 
      tab <- rbind(
        tab,
        cbind(unweighted, weighted)
      )
      
      weighted_qa <- dt_temp_w[DMG_died_within_365d=="1", 
                               .(V1 = sum(get(amnt))/sum(get(nAdm))), 
                               by=.(round_prob, w)][, sum(V1*w, na.rm=TRUE)/sum(w)]
      tab_qa <- rbind(
        tab_qa,
        cbind(unweighted, weighted_qa)
      )
      rm(unweighted, weighted, weighted_qa, dt_temp, dt_temp_w, amnt, nAdm, varn)
    }
    
    # QA -----------------d
    # no NA
    stopifnot(sum(is.na(tab)) == 0)
    # weigthing works
    stopifnot(all(round(tab_qa$`1`, 5) == round(tab_qa$weighted_qa, 5)))
    rm(tab_qa)
    # QA ends ------------d
    
    tab_adm_LOS_agg_temp <- rbind(
      tab_adm_LOS_agg_temp,
      tab[, sample := sample_n]
    )
    rm(#dt_exh, dt_cost,
      dt_temp_adm_any, tab)
 # }
  
  tab_adm_LOS_agg_temp[,variable:=factor(variable, 
                                         levels=c("any_adm", "Low", "High"))]
  data.table::setcolorder(tab_adm_LOS_agg_temp, c("sample", "variable", "0", "V1", "1"))
  data.table::setorder(tab_adm_LOS_agg_temp, sample, variable)
  tab_adm_LOS_agg_temp[, c("0", "V1", "1"):=lapply(.SD, round, 3), 
                       .SDcols=c("0", "V1", "1")]
  tab_adm_LOS_agg_temp[, last_adm_dropped:=drop_last_adm]
  
  tab_adm_LOS_agg <- rbind(
    tab_adm_LOS_agg,
    tab_adm_LOS_agg_temp
  )
}

full_tab_3 <- rbind(
  data.table::dcast.data.table(data=tab_adm_rate_agg, 
                               formula=sample+variable~last_adm_dropped, 
                               value.var=c("0", "V1", "1"))[
                                 , .(sample, variable, `0_FALSE`, `V1_FALSE`, 
                                     `1_FALSE`, `1_TRUE`, type="adm_rate") 
                                 ],
  data.table::dcast.data.table(data=tab_adm_num_agg,  
                               formula=sample+variable~last_adm_dropped, 
                               value.var=c("0", "V1", "1"))[
                                 , .(sample, variable, `0_FALSE`, `V1_FALSE`, 
                                     `1_FALSE`, `1_TRUE`, type="adm_num")
                                 ], 
  data.table::dcast.data.table(data=tab_adm_LOS_agg,  
                               formula=sample+variable~last_adm_dropped, 
                               value.var=c("0", "V1", "1"))[
                                 , .(sample, variable, `0_FALSE`, `V1_FALSE`,
                                     `1_FALSE`, `1_TRUE`, type="adm_LOS")
                                 ] 
)
full_tab_3[variable=="any_adm", var_name := "All"]
full_tab_3[variable=="Low", var_name := "   Low Intensity"]
full_tab_3[variable=="High", var_name := "   High Intensity"]
data.table::setcolorder(full_tab_3, c("var_name", "0_FALSE", "V1_FALSE", "1_FALSE",
                                      "1_TRUE", "sample", "variable", "type"))

full_tab_3$diff <- round(full_tab_3$`1_FALSE`- full_tab_3$`V1_FALSE`, 3 )

  invisible(Hmisc::latex(
    full_tab_3[sample == sample_n, -c(1,5, 6, 7, 8)],
    file = paste0("adm_stat_agg_", sample_n, ".tex"),
    center = 'centering',
    n.cgroup = c(2, 1,1),
    cgroup = c("Survivor", "Decedent", "Difference"),
    colheads = c("Unweighted",
                 "\\thead{Reweighted by\\\\Decedent Risk}",
                 "All Admissions",
                 "\\thead{Decedent -\\\\ Survivor\\\\(Reweighted}"),
    extracolheads = c("(1)", "(2)", "(3)", "(4)"),
    n.rgroup = c(3, 3, 3),
    rgroup = c("A. Any Admission",
               "\\thead{B. Admissions per Month\\\\(if Any During the Year)}",
               "C. Length of Stay (Days)"),
    rowname = full_tab_3[sample == sample_n]$var_name,
    rowlabel = "",
    col.just = c(rep.int("r", 4)),
    na.blank = TRUE, 
    extracolsize = "normalsize"
  ))

return(full_tab_3[sample == sample_n, -c(1,5, 6, 7, 8)])
}
